In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
import string
import re
nltk.download('stopwords')
stemmer=nltk.SnowballStemmer("english")
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Rakesh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
In [2]:
data=pd.read_csv('C:/Users/Rakesh/Datasets/tiktok_google_play_reviews.csv')
In [3]:
data.head()
Out[3]:
reviewId userName userImage content score thumbsUpCount reviewCreatedVersion at replyContent repliedAt
0 gp:AOqpTOHRz-11c0apHLSKHHp52FxUXsQS9Z88wP3sWc5... MR LOL GAMER https://play-lh.googleusercontent.com/a/AATXAJ... Good 5 0 23.8.4 2022-04-05 23:18:30 NaN NaN
1 gp:AOqpTOF6mFDEkIypmyT3shDLjPHg8zB3kdns2W36ahp... Dino Kljako https://play-lh.googleusercontent.com/a-/AOh14... Awesome app! Too many people on it where it's ... 5 0 NaN 2022-04-05 23:18:21 NaN NaN
2 gp:AOqpTOGtqU4sb8vuVo3-eB7kIXWoBn-0YCUZ1SnPRKS... Olivia Harding https://play-lh.googleusercontent.com/a/AATXAJ... Not bad 5 0 23.9.5 2022-04-05 23:17:34 NaN NaN
3 gp:AOqpTOFHDm-Qa5R6jCpOGTFT2qr1_PKbCTbBNPahCEn... Keli We https://play-lh.googleusercontent.com/a-/AOh14... It is good 2 0 22.2.5 2022-04-05 23:17:04 NaN NaN
4 gp:AOqpTOFB6Ndao8IHRpOJRmbSknwMGxHcwYzux93YyXI... Mavis Kotoka https://play-lh.googleusercontent.com/a/AATXAJ... Very interesting app 5 0 22.1.5 2022-04-05 23:17:04 NaN NaN
In [4]:
data=data[['content','score']]
In [5]:
data.head()
Out[5]:
content score
0 Good 5
1 Awesome app! Too many people on it where it's ... 5
2 Not bad 5
3 It is good 2
4 Very interesting app 5
In [6]:
data.isnull().sum()
Out[6]:
content    4
score      0
dtype: int64
In [7]:
data=data.dropna()
In [8]:
stopword=set(stopwords.words('english'))
In [9]:
def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in stopword]
    text=" ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text=" ".join(text)
    return text
In [10]:
data['content']=data['content'].apply(clean)
In [11]:
ratings=data['score'].value_counts()
numbers=ratings.index
quantity=ratings.values
import plotly.express as px
figure=px.pie(data,values=quantity,names=numbers,hole=0.5)
figure.show()
In [12]:
text=''.join(i for i in data.content)
stopwords=set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords,background_color='white').generate(text)
plt.figure(figsize=(15,10))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
plt.show()
In [13]:
data.head()
Out[13]:
content score
0 good 5
1 awesom app mani peopl easier fb girl awesom gu... 5
2 bad 5
3 good 2
4 interest app 5
In [15]:
nltk.download('vader_lexicon')
sentiments=SentimentIntensityAnalyzer()
data['Positive']=[sentiments.polarity_scores(i)['pos'] for i in data['content']]
data['Negative']=[sentiments.polarity_scores(i)['neg'] for i in data['content']]
data['Neutral']=[sentiments.polarity_scores(i)['neu'] for i in data['content']]
data=data[['content','Positive','Negative','Neutral']]
data.head()
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Rakesh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Out[15]:
content Positive Negative Neutral
0 good 1.000 0.0 0.000
1 awesom app mani peopl easier fb girl awesom gu... 0.381 0.0 0.619
2 bad 0.000 1.0 0.000
3 good 1.000 0.0 0.000
4 interest app 0.750 0.0 0.250
In [16]:
positive=''.join([i for i in data['content'][data['Positive']>data['Negative']]])
stopwords=set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords,background_color='white').generate(positive)
plt.figure(figsize=(15,10))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
Out[16]:
(-0.5, 399.5, 199.5, -0.5)
In [17]:
negative =' '.join([i for i in data['content'][data['Negative'] > data["Positive"]]])
stopwords = set(STOPWORDS)
wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(positive)
plt.figure( figsize=(15,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
Out[17]:
(-0.5, 399.5, 199.5, -0.5)
In [ ]: